#clear environment 
rm(list = ls())

#set directories
WORK.DIRECTORY = "Z:/user/rih4/Solar Siting/JAERE Code Repository"
DATA.DIRECTORY = file.path(WORK.DIRECTORY,"Data JAERE")
WORK.OUT = file.path(DATA.DIRECTORY,"intermediate_output")
PLOT.OUT = file.path(DATA.DIRECTORY, "final_output")
dir.create(WORK.OUT);dir.create(PLOT.OUT, recursive=T)

# Read in the generation data for each long,lat combination. This generation data is for a 4kW system modeled in SAM.
# Then merge these dataframes into one for saving as a csv file.

mydata1 <- read.csv(file.path(WORK.OUT,"results_CEC_0-4999.csv"))
mydata2 <- read.csv(file.path(WORK.OUT,"results_CEC_5000-9999.csv"))
mydata3 <- read.csv(file.path(WORK.OUT,"results_CEC_10000-14999.csv"))
mydata4 <- read.csv(file.path(WORK.OUT,"results_CEC_15000-19999.csv"))
mydata5 <- read.csv(file.path(WORK.OUT,"results_CEC_20000-24999.csv"))
mydata6 <- read.csv(file.path(WORK.OUT,"results_CEC_25000-28656.csv"))
hour <- seq(1,24*365)
mydata1 <- cbind(hour,mydata1)
mydata2 <- cbind(hour,mydata2)
mydata3 <- cbind(hour,mydata3)
mydata4 <- cbind(hour,mydata4)
mydata5 <- cbind(hour,mydata5)
mydata6 <- cbind(hour,mydata6)

mytempdata <- merge(mydata1,mydata2)
mytempdata <- merge(mytempdata,mydata3)
mytempdata <- merge(mytempdata,mydata4)
mytempdata <- merge(mytempdata,mydata5)
mydata <- merge(mytempdata,mydata6)

# Remove original dataframes
rm("mydata1", "mydata2", "mydata3", "mydata4", "mydata5", "mydata6","mytempdata")

mydata <- mydata[,-1] #remove hour column
write.csv(mydata,file.path(WORK.OUT,"solargendata_CEC.csv"))
save(mydata,file=file.path(WORK.OUT,"solargendata_CEC.Rda"))


#transposedata <- t(mydata)
# drop hour row for ease of data manipulation, will add back later
#transposedata <- transposedata[-1, ]

#put the file names into a vector (file names contain latitudes and longitudes)
filenames <- colnames(mydata)
long <- sub("_tmy.*", "", filenames)
long <- sub(".*_.","",long)
long <- as.numeric(long)
long <- -1*long

lat <- sub('_\\..*', '', filenames)
lat <- sub(".*_","",lat)
lat <- as.numeric(lat)

library(geosphere)
coordinates <- cbind(long,lat)
save(coordinates,file=file.path(WORK.OUT,"coordinates_CEC.Rda"))

zip.geo <- read.csv(file.path(DATA.DIRECTORY,"zipcode_centroid_latlon_3-27-2018.csv"))
zip.coordinates <- cbind(zip.geo$ZIP_CODE,zip.geo$longitude,zip.geo$latitude)
save(zip.coordinates,file=file.path(WORK.OUT,"zip_coordinates_CEC.Rda"))

# Form the distance matrix. Each row gives the distance from one of the zip.coordinates
# to one of the coordinates of the observed weather data 
D = distm(zip.coordinates[,2:3],coordinates[,1:2])
save(D,file=file.path(WORK.OUT,"distance_matrix_CEC.Rda"))

# Use which.min to pick out the smallest entry in each row and then use that to index
# into the coordinates data
matched.coordinates <- cbind(zip.coordinates, coordinates[apply(D, 1, which.min),])
save(matched.coordinates,file=file.path(WORK.OUT,"matched_coordinates_CEC.Rda"))

# save indices of the minimum of each row
rowmins <- apply(D, 1, which.min)
save(rowmins,file=file.path(WORK.OUT,"rowmin_indices_CEC.Rda"))


check.uniqueness <- matched.coordinates[,4:5]
nrow(unique(check.uniqueness))
# This tells us that there are 28657 distinct long,lat combinations,
# which is exactly the number of weather files we have!

transpose.coordinates <- t(coordinates)

# Perform the final merge
finaldata <- mydata[,rowmins]
transformfinaldata <- t(finaldata)
transformfinaldata <- cbind(matched.coordinates,transformfinaldata)
# stop code here and view this data to ensure that the matching worked correctly---it did when I ran it.
transformfinaldata <- transformfinaldata[,-c(2,3,4,5)]
transformfinaldata <- unname(transformfinaldata)
save(transformfinaldata,file=file.path(WORK.OUT,"solargen_zip_hour_CEC.Rda"))
write.csv(transformfinaldata,file.path(WORK.OUT,"solargen_zip_hour_CEC.csv"))
